Set working directory

setwd("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles")

Load libraries

library(phyloseq)
library(dplyr)
library(ggplot2)
library(viridis)
library(stringr)
library(vegan)
library(RColorBrewer)
library(BiocManager)
BiocManager::install("microbiome")
## 
## The downloaded binary packages are in
##  /var/folders/3x/wmkrzjt576j76t00d3dq37lhgqkkb3/T//Rtmp9CcR5B/downloaded_packages
library(microbiome)
library(knitr)
library(ggpubr)

Load metadata

metadata <-read.table("metadata.txt", sep="\t", header = T, row.names = 1, fill = 1)

Metaxa2

# Load data
metaxa_genus <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/metaxa_genus.txt")

# Create OTU table
OTU_metaxa <- metaxa_genus[,-1]

# Create tax table
tax_table_metaxa <- data.frame(str_split_fixed(data.frame(metaxa_genus) [,1], ";", 6))
colnames(tax_table_metaxa) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus")

# Combine into phyloseq object
metaxa_PHY <- phyloseq(otu_table(OTU_metaxa, taxa_are_rows=TRUE), 
                       tax_table(as.matrix(tax_table_metaxa)), sample_data(metadata))

Add SSU counts into metadata

metadata$SSU_counts <- sample_sums(metaxa_PHY)

Filter samples

# With low quality (BFH24_S142, BH63_S118, FH10_S171)
metaxa_PHY = subset_samples(metaxa_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")

# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
metaxa_PHY_ww = subset_samples(metaxa_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")

Ordinate Metaxa2 result to study wheather the taxonomy can be explained by the variable “country”

# Change into relative data
metaxa_rel_PHY <- transform_sample_counts(metaxa_PHY_ww, function(x) x/sum(x))

# Ordinate and plot data
metaxa_rel_PHY_ord <- ordinate(metaxa_rel_PHY, method = "PCoA", distance = "horn")
p1 <- plot_ordination(metaxa_rel_PHY, metaxa_rel_PHY_ord, color = "country", label = "name")
metaxa.p1 <- p1 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) + geom_point(colour = "black", 
    pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.9, linetype = 1) +
    theme_minimal() + labs(title = "Metaxa2")
metaxa.p1

# Test significance using pair-wise adonis
metaxa_temp <- subset_samples(metaxa_PHY_ww, (country == "Benin" | country == "Finland"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
## 
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## country    1    1.9415 1.94145  9.0971 0.15658  0.001 ***
## Residuals 49   10.4573 0.21341         0.84342           
## Total     50   12.3988                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaxa_temp <- subset_samples(metaxa_PHY_ww, (country == "Burkina Faso" | country == "Finland"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
## 
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## country    1    1.0193 1.01926   4.737 0.09524  0.001 ***
## Residuals 45    9.6826 0.21517         0.90476           
## Total     46   10.7019                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaxa_temp <- subset_samples(metaxa_PHY_ww, (country == "Benin" | country == "Burkina Faso"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
## 
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)    
## country    1    0.8664 0.86641  3.8123 0.0466  0.001 ***
## Residuals 78   17.7271 0.22727         0.9534           
## Total     79   18.5935                 1.0000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The explanatory variable "country" explains some (~15.7 %) of the difference in the grouping of taxonomy in the comparison between of Benin and Finland.

Metaphlan3

# Modify data in command-line to match sample names in metadata file
## tr '|' ';' <merged_abundance_table.txt > mod_merged_abundance_table.txt
## sed -i 's/_profile//g' mod_merged_abundance_table.txt
## sed -i 's/BFH38-A_S156/BFH38.A_S156/g' mod_merged_abundance_table.txt
## sed -i 's/BFH38-B_S157/BFH38.B_S157/g' mod_merged_abundance_table.txt
## sed -i 's/BH34-A_S98/BH34.A_S98/g' mod_merged_abundance_table.txt
## sed -i 's/BH34-B_S99/BH34.B_S99/g' mod_merged_abundance_table.txt

# Load data
metaphlan <- as.matrix(read.table("mod_merged_abundance_table.txt", fill = 1, header = T, check.names = F))

# Exclude the first column "NCBI_tax_id"
metaphlan <- subset(metaphlan, select = -c(NCBI_tax_id))

# Create OTU table
OTU_metaphlan <- metaphlan[,-1]

# Change values as numeric
class(OTU_metaphlan) <- "numeric"

# Create tax_table
tax_table_metaphlan <- data.frame(str_split_fixed(data.frame(metaphlan) [,1], ";", 7))
colnames(tax_table_metaphlan) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")

# Clean "*__"
tax_table_metaphlan <- apply(tax_table_metaphlan, 2, function(y) (gsub(".__", "", y)))

# Combine into phyloseq object
metaphlan_PHY <- phyloseq(otu_table(OTU_metaphlan, taxa_are_rows = TRUE), tax_table(as.matrix(tax_table_metaphlan)), sample_data(metadata))

Filter smaples

# Virus
metaphlan_PHY <- subset_taxa(metaphlan_PHY, Kingdom != "Virus")

# With low quality (BFH24_S142, BH63_S118, FH10_S171)
metaphlan_PHY = subset_samples(metaxa_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")

# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
metaphlan_PHY_ww = subset_samples(metaphlan_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")

Ordinate Metaphlan3 results to study wheather the taxonomy can be explained by the variable “country”

# Ordinate and plot data
metaphlan_PHY_ord <- ordinate(metaphlan_PHY_ww, method = "PCoA", distance = "horn")
p2 <- plot_ordination(metaphlan_PHY_ww, metaphlan_PHY_ord, color = "country", label = "name")
metaphlan.p2 <- p2 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) + 
    geom_point(colour = "black", pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.9, linetype = 1) +
    theme_minimal() + labs(title = "Metaphlan3")
metaphlan.p2

# Test significance using pair-wise adonis
metaphlan_temp <- subset_samples(metaphlan_PHY_ww, (country == "Benin" | country == "Finland"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
## 
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## country    1    1.9415 1.94145  9.0971 0.15658  0.001 ***
## Residuals 49   10.4573 0.21341         0.84342           
## Total     50   12.3988                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaphlan_temp <- subset_samples(metaphlan_PHY_ww, (country == "Burkina Faso" | country == "Finland"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
## 
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## country    1    1.0193 1.01926   4.737 0.09524  0.001 ***
## Residuals 45    9.6826 0.21517         0.90476           
## Total     46   10.7019                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaphlan_temp <- subset_samples(metaphlan_PHY_ww, (country == "Burkina Faso" | country == "Benin"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
## 
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)    
## country    1    0.8664 0.86641  3.8123 0.0466  0.001 ***
## Residuals 78   17.7271 0.22727         0.9534           
## Total     79   18.5935                 1.0000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The results are almost identical to the ones from Metaxa2 analysis: The explanatory variable "country" explains some (~15.7 %) of the difference in the grouping of taxonomy in the comparison between of Benin and Finland.

ResFinder

Load data

# Modify mapping output file "ARG_genemat.txt" in command line to match sample names in metadata file
## sed 's/BFH38-A_S156/BFH38.A_S156/g' ARG_genemat.txt > mod_ARG_genemat.txt
## sed -i 's/BFH38-B_S157/BFH38.B_S157/g' mod_ARG_genemat.txt
## sed -i 's/BH34-A_S98/BH34.A_S98/g' mod_ARG_genemat.txt
## sed -i 's/BH34-B_S99/BH34.B_S99/g' mod_ARG_genemat.txt

ARG_genemat <-as.matrix(read.table("mod_ARG_genemat.txt", header= T, check.names = F, row.names = 1))

Load ResFinder gene lengths

# Modify "resfinder.fasta" file so that only hits remain
## seqkit grep -f ARG_genes.txt resfinder.fasta > filtered_resfinder.fasta
# Check if there are correct number of lines
## grep ">" filtered_resfinder.fasta | wc -l
# Print out the gene lengths of these genes into file "lengths_resfinder.txt"
## cat filtered_resfinder.fasta | awk '$0 ~ ">" {if (NR > 1) {print c;} c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }' > lengths_resfinder.txt
# Reorder in excel to match with file "ARG_genemat_norm"
lengths_resfinder <-as.matrix(read.table("lengths_resfinder.txt", header= F, check.names = T, row.names = 1))
colnames(lengths_resfinder) <- c("Length")

HC normalization

# Divide by ResFinder hit gene lengths
ARG_length_norm <- ARG_genemat/lengths_resfinder[, 1]

# Divide by SSU counts and normalize to bacterial 16S rRNA length (1550)
ARG_genemat_norm <- t(t(ARG_length_norm)/metadata$SSU_counts) * 1550

# Check if correct:
identical(ARG_genemat[2020, 4]/metadata$SSU_counts[4], ARG_genemat_norm[2020, 4])
## [1] TRUE
# Save and load again to exclude row.names
write.table(ARG_genemat_norm, "~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=T, sep = "\t", col.names = T)

ARG_genemat_norm <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=NULL)
ARG_genemat_norm$row.names<-NULL

Normalize ARG counts to SSU

ARG_genemat_norm <- t(t(ARG_genemat)/metadata$SSU_counts)

# Check if correct:
identical(ARG_genemat[2020, 4]/metadata$SSU_counts[10], ARG_genemat_norm[2020, 4])
## [1] TRUE
# Save and load again to exclude row.names
write.table(ARG_genemat_norm, "~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=T, sep = "\t", col.names = T)

ARG_genemat_norm <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=NULL)
ARG_genemat_norm$row.names<-NULL

Create phyloseq object

# Create tax table
tax_table_resfinder <- read.csv("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/tax_table_resfinder.txt", header=FALSE, sep=";")
colnames(tax_table_resfinder) <- c("Gene", "Class")

# Combine to phyloseq object
resfinder_PHY <- phyloseq(otu_table(ARG_genemat_norm, taxa_are_rows = TRUE), sample_data(metadata), 
    tax_table(as.matrix(tax_table_resfinder)))

Filter samples

# With low quality (BFH24_S142, BH63_S118, FH10_S171)
resfinder_PHY = subset_samples(resfinder_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")

# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
resfinder_PHY_ww = subset_samples(resfinder_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")

Ordinate ResFinder mapping results to study wheather the taxonomy can be explained by the variable “country”

# Ordinate and plot data
resfinder_PHY_ord <- ordinate(resfinder_PHY_ww, method = "PCoA", distance = "horn")
p3 <- plot_ordination(resfinder_PHY_ww, resfinder_PHY_ord, color = "country", label = "name")
resfinder.p3 <- p3 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) + 
    geom_point(colour = "black", pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.9, linetype = 1) +
    theme_minimal() + labs(title = "ResFinder")
resfinder.p3

# Test significance between all pairs
resfinder_temp <- subset_samples(resfinder_PHY_ww, (country == "Finland" | country == "Benin"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
## 
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## country    1     1.561 1.56098  5.4817 0.10062  0.001 ***
## Residuals 49    13.953 0.28476         0.89938           
## Total     50    15.514                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resfinder_temp <- subset_samples(resfinder_PHY_ww, (country == "Burkina Faso" | country == "Benin"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
## 
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## country    1    0.8742 0.87417  3.2986 0.04057  0.004 **
## Residuals 78   20.6710 0.26501         0.95943          
## Total     79   21.5452                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resfinder_temp <- subset_samples(resfinder_PHY_ww, (country == "Finland" | country == "Burkina Faso"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
## 
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp),      permutations = 9999)) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## country    1    1.4168 1.41684  6.4565 0.12547  0.001 ***
## Residuals 45    9.8750 0.21945         0.87453           
## Total     46   11.2919                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The explanatory variable "country" explains some (~10.1 % [HC: ~10.2 %]) of the difference in the grouping of taxonomy in the comparison between of Benin and Finland and some (~12.5 % [HC: ~12.9 %]) between Burkina Faso and Finland.

Diversity indexes by Metaxa2

# Take a look at the indexes (now not the relative data, we'll see...)
metaxa_tab <-microbiome::alpha(metaxa_PHY_ww, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaxa_tab))
observed chao1 diversity_inverse_simpson diversity_gini_simpson diversity_shannon diversity_fisher diversity_coverage evenness_camargo evenness_pielou evenness_simpson evenness_evar evenness_bulla dominance_dbp dominance_dmn dominance_absolute dominance_relative dominance_simpson dominance_core_abundance dominance_gini rarity_log_modulo_skewness rarity_low_abundance rarity_rare_abundance
BFH10_S128 760 996.5654 22.24596 0.9550480 4.081135 134.4811 8 0.8704043 0.6152479 0.0292710 0.2217539 0.2167769 0.1233846 0.2096778 4707 0.1233846 0.0449520 0.9757005 0.9735181 2.061394 0.1459540 0.0042989
BFH11_S129 834 1134.7348 19.13635 0.9477434 4.081370 149.1305 7 0.8896129 0.6067839 0.0229453 0.2232231 0.2180059 0.1565893 0.2524138 6244 0.1565893 0.0522566 0.9798621 0.9715648 2.061395 0.1546583 0.0047649
BFH12_S130 706 961.1532 16.79643 0.9404635 3.909282 129.7812 6 0.9210474 0.5959620 0.0237910 0.2328774 0.2198822 0.1629555 0.2622670 4852 0.1629555 0.0595365 0.9830730 0.9755161 2.061385 0.1430395 0.0043325
BFH13_S131 671 845.3368 14.20372 0.9295959 3.740362 109.8236 5 0.8966893 0.5746650 0.0211680 0.1973915 0.2068136 0.1725040 0.3283068 8511 0.1725040 0.0704041 0.9748267 0.9798373 2.061330 0.1261502 0.0039321
BFH14_S132 665 928.1386 25.68127 0.9610611 4.130331 114.6507 10 0.7772335 0.6354564 0.0386184 0.1981389 0.2150153 0.1175224 0.2025528 4438 0.1175224 0.0389389 0.9744724 0.9764716 2.061387 0.1255991 0.0072558
BFH15_S133 698 878.7059 15.11920 0.9338589 4.018954 118.9171 9 0.8846212 0.6137476 0.0216607 0.2001027 0.2168680 0.2284483 0.2854829 9593 0.2284483 0.0661411 0.9474662 0.9755203 2.061316 0.1207849 0.0298628
# Create data table
metaxa.meta <- meta(metaxa_PHY_ww)
kable(head(metaxa.meta))
name total_seqs country location sample_type source
BFH10_S128 BFH10_S128 31970832 Burkina Faso Clinique SOUKA waste water delivery room, surgery
BFH11_S129 BFH11_S129 31323748 Burkina Faso Clinique SOUKA waste water laboratory
BFH12_S130 BFH12_S130 24455299 Burkina Faso Clinique SOUKA waste water peadiatrics
BFH13_S131 BFH13_S131 31789060 Burkina Faso Source de Vie, Ouagadougou waste water delivery room
BFH14_S132 BFH14_S132 33699519 Burkina Faso Source de Vie, Ouagadougou waste water hospitalization, maternity
BFH15_S133 BFH15_S133 30866760 Burkina Faso Source de Vie, Ouagadougou waste water hospitalization, maternity, surgery
# Add indexes to data table
## Shannon
metaxa.meta$diversity_shannon <- metaxa_tab$diversity_shannon
## Gini Simpson
metaxa.meta$diversity_gini_simpson <- metaxa_tab$diversity_gini_simpson
## Inverse Simpson
metaxa.meta$diversity_inverse_simpson <- metaxa_tab$diversity_inverse_simpson

# Check whether the distribution of the diversity is normal
hist(metaxa.meta$diversity_shannon)

shapiro.test(metaxa.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
## 
##  Shapiro-Wilk normality test
## 
## data:  metaxa.meta$diversity_shannon
## W = 0.93911, p-value = 0.0004099
qqnorm(metaxa.meta$diversity_shannon)

hist(metaxa.meta$diversity_gini_simpson)

shapiro.test(metaxa.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
## 
##  Shapiro-Wilk normality test
## 
## data:  metaxa.meta$diversity_gini_simpson
## W = 0.68269, p-value = 1.428e-12
qqnorm(metaxa.meta$diversity_gini_simpson)

hist(metaxa.meta$diversity_inverse_simpson)

shapiro.test(metaxa.meta$diversity_inverse_simpson)
## 
##  Shapiro-Wilk normality test
## 
## data:  metaxa.meta$diversity_inverse_simpson
## W = 0.98082, p-value = 0.2121
qqnorm(metaxa.meta$diversity_inverse_simpson)

# Create a list of pairwise comparisons
div.country <- levels(metaxa.meta$country)
country.pairs <- combn(seq_along(div.country), 2, simplify = FALSE, FUN = function(i)div.country[i])
print(country.pairs)
## [[1]]
## [1] "Benin"        "Burkina Faso"
## 
## [[2]]
## [1] "Benin"   "Finland"
## 
## [[3]]
## [1] "Burkina Faso" "Finland"
# Plot
## Shannon
p4 <- ggviolin(metaxa.meta, x = "country", y = "diversity_shannon",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p4 <- p4 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test") 
print(p4)

## Gini Simpson
p5 <- ggviolin(metaxa.meta, x = "country", y = "diversity_gini_simpson",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p5 <- p5 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test") 
print(p5)

# The probability that two individuals randomly selected from a sample will belong to same species is higher in Finland than in Benin.

## Inverse Simpson
p6 <- ggviolin(metaxa.meta, x = "country", y = "diversity_inverse_simpson",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p6 <- p6 + stat_compare_means(comparisons = country.pairs, method = "t.test") 
print(p6)

# The alpha diversity is the highest in Benin compared to Burkina Faso and Finland. Of Burkina Faso and Finland the diversity is higher in Burkina Faso. 

Diversity indexes by Metaphlan3

# Take a look at the indexes
metaphlan_tab <-microbiome::alpha(metaphlan_PHY_ww, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaphlan_tab))
observed chao1 diversity_inverse_simpson diversity_gini_simpson diversity_shannon diversity_fisher diversity_coverage evenness_camargo evenness_pielou evenness_simpson evenness_evar evenness_bulla dominance_dbp dominance_dmn dominance_absolute dominance_relative dominance_simpson dominance_core_abundance dominance_gini rarity_log_modulo_skewness rarity_low_abundance rarity_rare_abundance
BFH10_S128 760 996.5654 22.24596 0.9550480 4.081135 134.4811 8 0.8704043 0.6152479 0.0292710 0.2217539 0.2167769 0.1233846 0.2096778 4707 0.1233846 0.0449520 0.9757005 0.9735181 2.061394 0.1459540 0.0042989
BFH11_S129 834 1134.7348 19.13635 0.9477434 4.081370 149.1305 7 0.8896129 0.6067839 0.0229453 0.2232231 0.2180059 0.1565893 0.2524138 6244 0.1565893 0.0522566 0.9798621 0.9715648 2.061395 0.1546583 0.0047649
BFH12_S130 706 961.1532 16.79643 0.9404635 3.909282 129.7812 6 0.9210474 0.5959620 0.0237910 0.2328774 0.2198822 0.1629555 0.2622670 4852 0.1629555 0.0595365 0.9830730 0.9755161 2.061385 0.1430395 0.0043325
BFH13_S131 671 845.3368 14.20372 0.9295959 3.740362 109.8236 5 0.8966893 0.5746650 0.0211680 0.1973915 0.2068136 0.1725040 0.3283068 8511 0.1725040 0.0704041 0.9748267 0.9798373 2.061330 0.1261502 0.0039321
BFH14_S132 665 928.1386 25.68127 0.9610611 4.130331 114.6507 10 0.7772335 0.6354564 0.0386184 0.1981389 0.2150153 0.1175224 0.2025528 4438 0.1175224 0.0389389 0.9744724 0.9764716 2.061387 0.1255991 0.0072558
BFH15_S133 698 878.7059 15.11920 0.9338589 4.018954 118.9171 9 0.8846212 0.6137476 0.0216607 0.2001027 0.2168680 0.2284483 0.2854829 9593 0.2284483 0.0661411 0.9474662 0.9755203 2.061316 0.1207849 0.0298628
# Create data table
metaphlan.meta <- meta(metaphlan_PHY_ww)
kable(head(metaxa.meta))
name total_seqs country location sample_type source diversity_shannon diversity_gini_simpson diversity_inverse_simpson
BFH10_S128 BFH10_S128 31970832 Burkina Faso Clinique SOUKA waste water delivery room, surgery 4.081135 0.9550480 22.24596
BFH11_S129 BFH11_S129 31323748 Burkina Faso Clinique SOUKA waste water laboratory 4.081370 0.9477434 19.13635
BFH12_S130 BFH12_S130 24455299 Burkina Faso Clinique SOUKA waste water peadiatrics 3.909282 0.9404635 16.79643
BFH13_S131 BFH13_S131 31789060 Burkina Faso Source de Vie, Ouagadougou waste water delivery room 3.740362 0.9295959 14.20372
BFH14_S132 BFH14_S132 33699519 Burkina Faso Source de Vie, Ouagadougou waste water hospitalization, maternity 4.130331 0.9610611 25.68127
BFH15_S133 BFH15_S133 30866760 Burkina Faso Source de Vie, Ouagadougou waste water hospitalization, maternity, surgery 4.018954 0.9338589 15.11920
# Add indexes to data table
## Shannon
metaphlan.meta$diversity_shannon <- metaphlan_tab$diversity_shannon
## Gini Simpson
metaphlan.meta$diversity_gini_simpson <- metaphlan_tab$diversity_gini_simpson
## Inverse Simpson
metaphlan.meta$diversity_inverse_simpson <- metaphlan_tab$diversity_inverse_simpson

# Check whether the distribution of the diversity is normal
hist(metaphlan.meta$diversity_shannon)

shapiro.test(metaphlan.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
## 
##  Shapiro-Wilk normality test
## 
## data:  metaphlan.meta$diversity_shannon
## W = 0.93911, p-value = 0.0004099
qqnorm(metaphlan.meta$diversity_shannon)

hist(metaphlan.meta$diversity_gini_simpson)

shapiro.test(metaphlan.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
## 
##  Shapiro-Wilk normality test
## 
## data:  metaphlan.meta$diversity_gini_simpson
## W = 0.68269, p-value = 1.428e-12
qqnorm(metaphlan.meta$diversity_gini_simpson)

hist(metaphlan.meta$diversity_inverse_simpson)

shapiro.test(metaphlan.meta$diversity_inverse_simpson) 
## 
##  Shapiro-Wilk normality test
## 
## data:  metaphlan.meta$diversity_inverse_simpson
## W = 0.98082, p-value = 0.2121
qqnorm(metaxa.meta$diversity_inverse_simpson)

# Plot
## Shannon
p7 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_shannon",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p7 <- p7 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test") 
print(p7)

## Gini Simpson
p8 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_gini_simpson",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p8 <- p8 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test") 
print(p8)

# The outlier samole in here is FH5_S166

## Inverse Simpson
p9 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_inverse_simpson",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p9 <- p9 + stat_compare_means(comparisons = country.pairs, method = "t.test") 
print(p9)

# The results are identical to those from Metaxa2.

Diversity indexes by ResFinder

# Shannon
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_ww, index = "diversity_shannon")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
diversity_shannon
BFH12_S130 5.247442
BH09_S83 4.809859
BFH35_S153 5.921721
BSE93_S121 5.053210
BH47_S108 4.892473
BH52_S112 4.670272
## Create data table
resfinder.meta <- meta(resfinder_PHY_ww)
kable(head(resfinder.meta))
name total_seqs country location sample_type source SSU_counts
BFH12_S130 BFH12_S130 24455299 Burkina Faso Clinique SOUKA waste water peadiatrics 29775
BH09_S83 BH09_S83 24778124 Benin Hopital de Zone Calavi waste water surgery room of demanding operations, sump 30132
BFH35_S153 BFH35_S153 30094227 Burkina Faso Yalgado teaching hospital of Ouagadougou waste water operating room, neurosurgery, traumatology 75529
BSE93_S121 BSE93_S121 37658434 Benin Savalou area tap water drinking water in Aglomodjodji village 40428
BH47_S108 BH47_S108 36833527 Benin Hospital Saint Jean waste water hospitalization ward, vaccination room septic tank 47007
BH52_S112 BH52_S112 36821916 Benin Hospital Saint Jean clean water tank for washing hands 25959
## Add indexes to data table
resfinder.meta$diversity_shannon <- resfinder_tab$diversity_shannon

# Check whether the distribution of the diversity is normal
hist(resfinder.meta$diversity_shannon)

shapiro.test(resfinder.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
## 
##  Shapiro-Wilk normality test
## 
## data:  resfinder.meta$diversity_shannon
## W = 0.83837, p-value = 1.923e-08
qqnorm(resfinder.meta$diversity_shannon)

# Plot
p10 <- ggviolin(resfinder.meta, x = "country", y = "diversity_shannon",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p10 <- p10 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test") 
print(p10)

# Gini Simpson
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_ww, index = "diversity_gini_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
diversity_gini_simpson
BFH12_S130 0.9894870
BH09_S83 0.9835953
BFH35_S153 0.9942990
BSE93_S121 0.9820126
BH47_S108 0.9830211
BH52_S112 0.9732776
## Add indexes to data table
resfinder.meta$diversity_gini_simpson <- resfinder_tab$diversity_gini_simpson

# Check whether the distribution of the diversity is normal
hist(resfinder.meta$diversity_gini_simpson)

shapiro.test(resfinder.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
## 
##  Shapiro-Wilk normality test
## 
## data:  resfinder.meta$diversity_gini_simpson
## W = 0.36077, p-value < 2.2e-16
qqnorm(resfinder.meta$diversity_gini_simpson)

# Plot
p11 <- ggviolin(resfinder.meta, x = "country", y = "diversity_gini_simpson",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p11 <- p11 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test") 
print(p11)

# The outlier sample here is FH6-S167

# Inverse Simpson
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_ww, index = "diversity_inverse_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
diversity_inverse_simpson
BFH12_S130 95.12045
BH09_S83 60.95816
BFH35_S153 175.40708
BSE93_S121 55.59435
BH47_S108 58.89676
BH52_S112 37.42173
## Add indexes to data table
resfinder.meta$diversity_inverse_simpson <- resfinder_tab$diversity_inverse_simpson

# Check whether the distribution of the diversity is normal
hist(metaphlan.meta$diversity_inverse_simpson)

shapiro.test(metaphlan.meta$diversity_inverse_simpson) 
## 
##  Shapiro-Wilk normality test
## 
## data:  metaphlan.meta$diversity_inverse_simpson
## W = 0.98082, p-value = 0.2121
qqnorm(metaxa.meta$diversity_inverse_simpson)

# Plot
p12 <- ggviolin(resfinder.meta, x = "country", y = "diversity_inverse_simpson",
 add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592")) 
p12 <- p12 + stat_compare_means(comparisons = country.pairs, method = "t.test") 
print(p12)

# The alpha diversity of resistance genes deteced by ResFinder seems slightly higher in Burkina Faso than in Benin and in Finland.